rm(list = ls())
library(data.table)
library(plyr)
library(tidyr)
library(lfe)
library(stargazer)
library(xtable)
library(sandwich)
library(roll)
library(readxl)
library(readr)
library(zoo)
library(texreg)
library(DescTools)

m <- data.table(read_xlsx("../Data/MasterData.xlsx", skip = 1, guess_max = 1e4))
m[, age := 2021 - year]
m[, experience := experience / 12]

m[, retrecall_1day_rate := retrecall_1day_rate * 100]
m[, retrecall_30day_rate := retrecall_30day_rate * 100]
m[, retrecall_1year_rate := retrecall_1year_rate * 100]
m[, retrecall_5year_rate := retrecall_5year_rate * 100]

m[, expret30day_market_rate := expret30day_market_rate * 100]
m[, expret1year_market_rate := expret1year_market_rate * 100]
m[, expret30day_self_rate := expret30day_self_rate * 100]
m[, expret1year_self_rate := expret1year_self_rate * 100]

###########################################################################
# Explain expectation
###########################################################################

m[, memrableperiod_pret := memrableperiod_pret * 100]
m[, memrableperiod_aret := memrableperiod_aret * 100]
m[, bias := (memrableperiod_pret - memrableperiod_aret)]

m[, memrableperiod_begin := as.yearmon(memrableperiod_begin, "%Ym%m")]
m[, memrableperiod_end := as.yearmon(memrableperiod_end, "%Ym%m")]
m[, memrableperiod_dist := as.yearmon(2022 + 2 / 12) - (memrableperiod_begin + memrableperiod_end) / 2]

m[, dummy_female := ifelse(substr(gender, 1, 1) == "B", 1, 0)]
m[, dummy_college := ifelse(substr(education, 1, 1) %in% c("F", "G"), 1, 0)]
m[, dummy_wealth1m := ifelse(substr(total_wealth, 1, 1) %in% c("F", "G", "H"), 1, 0)]
m[, dummy_income200k := ifelse(substr(total_income, 1, 1) %in% c("D", "E", "F", "G", "H"), 1, 0)]
m[, dummy_accountcheck_often := ifelse(substr(accountcheck_freq, 1, 1) %in% c("A", "B"), 1, 0)]
m[, dummy_newscheck_often := ifelse(substr(newscheck_freq, 1, 1) %in% c("A", "B"), 1, 0)]
m[, dummy_discussion_often := ifelse(substr(discussion_freq, 1, 1) %in% c("A", "B"), 1, 0)]
m[, dummy_num_wechat := ifelse(substr(num_wechat, 1, 1) %in% c("C", "D", "E"), 1, 0)]

f1 <- felm(memrableperiod_dist ~ age + dummy_female + dummy_college +
             dummy_wealth1m + dummy_income200k +
             dummy_accountcheck_often + dummy_newscheck_often + dummy_discussion_often +
             dummy_num_wechat +
             agree + extrv + consc + neuro + opene | brokername + city + date | 0 | date, m[type == 0])

f2 <- felm(memrableperiod_dist ~ age + experience + dummy_female + dummy_college +
             dummy_wealth1m + dummy_income200k +
             dummy_accountcheck_often + dummy_newscheck_often + dummy_discussion_often +
             dummy_num_wechat +
             agree + extrv + consc + neuro + opene | brokername + city + date | 0 | date, m[type == 0])

f3 <- felm(memrableperiod_pret ~ age +
             dummy_female + dummy_college +
             dummy_wealth1m + dummy_income200k +
             dummy_accountcheck_often + dummy_newscheck_often + dummy_discussion_often +
             dummy_num_wechat +
             agree + extrv + consc + neuro + opene | brokername + city + date | 0 | date, m[type == 0])

f4 <- felm(memrableperiod_pret ~ age + experience +
             dummy_female + dummy_college +
             dummy_wealth1m + dummy_income200k +
             dummy_accountcheck_often + dummy_newscheck_often + dummy_discussion_often +
             dummy_num_wechat +
             agree + extrv + consc + neuro + opene | brokername + city + date | 0 | date, m[type == 0])

f5 <- felm(memrableperiod_pret ~ age + experience + memrableperiod_dist +
             dummy_female + dummy_college +
             dummy_wealth1m + dummy_income200k +
             dummy_accountcheck_often + dummy_newscheck_often + dummy_discussion_often +
             dummy_num_wechat +
             agree + extrv + consc + neuro + opene | brokername + city + date | 0 | date, m[type == 0])

stargazer(f1, f2, f3, f4, f5,
          align = TRUE, dep.var.labels.include = TRUE,
          covariate.labels = c(
            "Age", "Experience", "Distance", "Female", "College", "Wealth>1M",
            "Income>200K", "Often check account", "Often check news",
            "Often discuss", "Many Wechat groups", "Agreeableness",
            "Extraversion", "Conscientiousness", "Neuroticism", "Openness"
          ),
          omit.stat = c("LL", "ser", "F"), ord.intercepts = FALSE, no.space = TRUE,
          title = "",
          single.row = FALSE, column.sep.width = "0pt", digits = 2
)